/**
 * 2017年4月23日
 */
package cn.edu.bjtu.wordseg.impl;

import org.ansj.domain.Result;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.nlpcn.commons.lang.util.StringUtil;

import cn.edu.bjtu.entity.IDocument;
import cn.edu.bjtu.wordseg.DocumentSegmentation;
import cn.edu.bjtu.wordseg.WordSegResult;

/**
 * 这个类是实现了Ansj分词算法,地址
 * https://github.com/NLPchina/ansj_seg
 * @author Alex
 *
 */
public class AnsjDocumentAnalyzer extends AbstractDocumentSegmentation{
	protected ToAnalysis ta = null;
	/**
	 * 一个萌萌的构造函数
	 */
	public AnsjDocumentAnalyzer() {
		this.ta = new ToAnalysis();
	}
	@Override
	public String segment(IDocument doc) {
		 return segment(doc.getContent());
	}
	/**
	 * 返回标准的空格分割的词 不带记性标注的
	 * @see cn.edu.bjtu.wordseg.DocumentSegmentation#segment(java.lang.String)
	 */
	@Override
	public String segment(String doc) {

		
		return ta.parseStr(doc).toStringWithOutNature(" ");
	}
	@Override
	public WordSegResult segmentExtend(IDocument doc) {
		return segmentExtend(doc.getContent());
	}
	/* (non-Javadoc)
	 * @see cn.edu.bjtu.wordseg.DocumentSegmentation#segmentExtend(java.lang.String)
	 */
	@Override
	public WordSegResult segmentExtend(String doc) {
		// TODO Auto-generated method stub
		final Result res = ta.parseStr(doc);
		return new WordSegResult() {
			public String toString(){
				return res.toString(" ");
			}
		};
	}
}
